import json
import re
import os
import urllib.request
from urllib.parse import urlparse, urlunparse
import datetime

# 读取JSON文件
with open('classPhotoByUserId.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 存储所有视频URL
video_urls = []

# 提取所有视频URL
for item in data['data']['list']:
    content = item['content']
    # 使用正则表达式匹配data-url属性中的视频链接
    urls = re.findall(r'data-url="([^"]+\.mp4)"', content)
    for url in urls:
        # 移除查询参数（如果有的话）
        parsed_url = urlparse(url)
        clean_url = urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, '', '', ''))
        video_urls.append(clean_url)

# 去重
video_urls = list(set(video_urls))

print(f"找到 {len(video_urls)} 个唯一的视频URL")

if len(video_urls) > 0:
    # 下载所有视频
    for i, url in enumerate(video_urls):
        try:
            # 获取文件名
            parsed_url = urlparse(url)
            filename = os.path.basename(parsed_url.path)
            
            # 下载视频
            print(f"正在下载视频 {i+1}/{len(video_urls)}: {filename}")
            urllib.request.urlretrieve(url, filename)
            print(f"已下载 {i+1}/{len(video_urls)}: {filename}")
        except Exception as e:
            print(f"下载失败 {url}: {e}")
    
    print(f"所有视频已保存到当前文件夹中")
else:
    print("未找到视频文件")